
// Description:  This creates a dataset containing the case total per year from 2008FY-2017FY.

clear

cd "~/Documents/Research/data/malpractice case data/North Carolina"

import excel "~/Documents/Research/data/malpractice case data/North Carolina/PublicMalpracticePayments-MRichards.xlsx", sheet("Judgments") firstrow

save judgements.dta, replace

clear
import excel "~/Documents/Research/data/malpractice case data/North Carolina/PublicMalpracticePayments-MRichards.xlsx", sheet("All Other Payment Types") firstrow

append using judgements

//Initial number of observations

egen nmiss = rowmiss(LastName FirstName LicenseType DateofIncident PaymentMonth PaymentDay PaymentYear LicenseeResponse IncidentCity IncidentState AreaofPractice)
drop if nmiss == 11

gen nt = _N
display nt //2,768

//Limit geography to NC

replace IncidentState = lower(IncidentState)
keep if IncidentState=="nc" | IncidentState=="north carolina"

replace nt = _N
display nt //962

//Limit year of incident from 2008-2017

gen incidentYrChar=substr(DateofIncident,7,4)
gen incidentYr=real(incidentYrChar)
drop incidentYrChar

keep if inrange(incidentYr, 2007, 2017)

replace nt = _N
display nt //629

// Rename year variable

gen incidentMoChar=substr(DateofIncident,1,2)
gen month=real(incidentMoChar)

gen     part =.
replace part =1 if month <=9
replace part =2 if month > 9 & month <=12

gen FisYear=.
replace FisYear=2008 if (incidentYr==2007 & part==2) | (incidentYr==2008 & part==1)
replace FisYear=2009 if (incidentYr==2008 & part==2) | (incidentYr==2009 & part==1)
replace FisYear=2010 if (incidentYr==2009 & part==2) | (incidentYr==2010 & part==1)
replace FisYear=2011 if (incidentYr==2010 & part==2) | (incidentYr==2011 & part==1)
replace FisYear=2012 if (incidentYr==2011 & part==2) | (incidentYr==2012 & part==1)
replace FisYear=2013 if (incidentYr==2012 & part==2) | (incidentYr==2013 & part==1)
replace FisYear=2014 if (incidentYr==2013 & part==2) | (incidentYr==2014 & part==1)
replace FisYear=2015 if (incidentYr==2014 & part==2) | (incidentYr==2015 & part==1)
replace FisYear=2016 if (incidentYr==2015 & part==2) | (incidentYr==2016 & part==1)
replace FisYear=2017 if (incidentYr==2016 & part==2) | (incidentYr==2017 & part==1)

drop if FisYear==.

// Limit dataset to only the variables we need

keep FisYear

sort FisYear
by FisYear: gen tot_cases = _N

duplicates drop

// Save dataset

save "~/Desktop/mal_cases_08to17", replace
